# Import pandas
import pandas as pd
# Import datasets
hiv = pd.read_csv(r"C:\Users\Lyz\Documents\Python\Portfolio\HIV_AIDS\HIV.csv")
aids = pd.read_csv(r"C:\Users\Lyz\Documents\Python\Portfolio\HIV_AIDS\AIDS.csv")
death = pd.read_csv(r"C:\Users\Lyz\Documents\Python\Portfolio\HIV_AIDS\Death.csv")
# Merge dataframes
df = pd.merge(hiv, aids, on = ["County", "Year"])
HIV_AIDS = pd.merge(df, death, on = ["County", "Year"])
# Print the full list of columns
print(list(HIV_AIDS.columns))
['Year', 'County', 'HIV_Count', 'HIV_Denom', 'HIV_Rate', 'HIV_MOV', 'AIDS_Count', 'AIDS_Denom', 'AIDS_Rate', 'AIDS_MOV', 'Death_Count', 'Death_Rate']
HIV_AIDS.head(20)
# drop Florida as this is not a county but a state
state = ["Florida"]
HIV_AIDS = HIV_AIDS[HIV_AIDS.County.isin(state) == False]
# sort data ascending by year
HIV_AIDS = HIV_AIDS.sort_values("Year")
HIV_AIDS.head(5)
| Year | County | HIV_Count | HIV_Denom | HIV_Rate | HIV_MOV | AIDS_Count | AIDS_Denom | AIDS_Rate | AIDS_MOV | Death_Count | Death_Rate | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 649 | 2012 | Washington | 1 | 24,921 | 4.0 | NaN | 3 | 24921.0 | 12.0 | NaN | 0 | 0.0 |
| 614 | 2012 | Holmes | 2 | 19,997 | 10.0 | NaN | 2 | 19997.0 | 10.0 | NaN | 0 | 0.0 |
| 613 | 2012 | Hillsborough | 273 | 1,260,887 | 21.7 | 2.6 | 196 | 1260887.0 | 15.5 | 2.2 | 57 | 4.4 |
| 612 | 2012 | Highlands | 4 | 98,980 | 4.0 | NaN | 4 | 98980.0 | 4.0 | NaN | 5 | 4.7 |
| 611 | 2012 | Hernando | 7 | 173,214 | 4.0 | 3.0 | 9 | 173214.0 | 5.2 | 3.4 | 4 | 1.9 |
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Bar(
x = HIV_AIDS["County"],
y = HIV_AIDS["HIV_Rate"],
name = "HIV Rate",
marker_color = 'indianred'
))
fig.add_trace(go.Bar(
x = HIV_AIDS["County"],
y = HIV_AIDS["AIDS_Rate"],
name = "AIDs Rate",
marker_color = 'lightsalmon'
))
fig.add_trace(go.Bar(
x = HIV_AIDS["County"],
y = HIV_AIDS["Death_Rate"],
name = "Death Rate",
marker_color = 'lightsalmon'
))
fig.update_layout(barmode = "group", xaxis_tickangle=-45)
fig.show()
# With this code, we can't visualize by year
import plotly.graph_objects as go
import pandas as pd
# Extract only the year part from the 'Year' column
HIV_AIDS['Year'] = HIV_AIDS['Year'].astype(str)
HIV_AIDS['Year'] = HIV_AIDS['Year'].str.extract('(\d{4})', expand=False)
HIV_AIDS['Year'] = HIV_AIDS['Year'].astype(int)
HIV_AIDS['Year'] = pd.to_datetime(HIV_AIDS['Year'], format='%Y')
years = sorted(HIV_AIDS['Year'].dt.year.unique())
# Create a function to filter data by year
def filter_data_by_year(data, year):
return data[data['Year'].dt.year == year]
# Create a slider
steps = []
for year in years:
filtered_data = filter_data_by_year(HIV_AIDS, year)
step = dict(
method="restyle",
args=[{"x": [filtered_data["County"]],
"y": [filtered_data["HIV_Rate"], filtered_data["AIDS_Rate"], filtered_data["Death_Rate"]]}],
label=str(year)
)
steps.append(step)
slider = dict(
active=0,
currentvalue={"prefix": "Year: "},
pad={"t": 50},
steps=steps
)
# Initial data for the first year
initial_data = filter_data_by_year(HIV_AIDS, years[0])
fig = go.Figure()
fig.add_trace(go.Bar(
x=initial_data["County"],
y=initial_data["HIV_Rate"],
name="HIV Rate",
marker_color='blue'
))
fig.add_trace(go.Bar(
x=initial_data["County"],
y=initial_data["AIDS_Rate"],
name="AIDs Rate",
marker_color='mediumaquamarine'
))
fig.add_trace(go.Bar(
x=initial_data["County"],
y=initial_data["Death_Rate"],
name="Death Rate",
marker_color='green'
))
# Update the layout
fig.update_layout(
barmode="group",
xaxis_tickangle=-45,
sliders=[slider]
)
fig.show()